

%%%%%%%%%%%%%%%%%%%%
% Do the same counterfactual analysis using the serial correlation correction



clear
delete('Ryan_');
diary('Ryan_');

cd ../china/data
load hs10_indlist.raw;
indN=length(hs10_indlist);


MC=1000;

rand_seed = 100;
rand('seed',rand_seed);
rng(0,'twister');


for w=1:50

folder=sprintf('/china/estimation/hs10/biggest/raw_%d',hs10_indlist(w,1))


Name='Ryan is Great';

tic
addpath('/apps/tomlab');
startup

cd (folder)


load cities.raw;
load imp.raw;
load citydiff.raw;
load statepts.raw;
load price_minyear.raw;
load price_maxyear.raw;
load firmshare.raw;


load beta_newq_biggest.mat;
load data2_q_biggest.mat;
load EV_newq.mat;
%load EV_counter_final2.mat;


X=length(price_minyear);
N=length(statepts)-1;
M=length(imp);
C=length(citydiff);
S=N*X*X

EV_counter_final2=zeros(S,1);

% Which state was each importer in?
state=zeros(M,1);
for i=1:M
	state(i,1)=N*(imp(i,2)-1) + imp(i,1);
end

I1=imp(:,3)';

Pprice_mean=zeros(5,MC);
Pprice_med=zeros(5,MC);
Pswitchpct=zeros(5,MC);
Pcityswitchpct=zeros(5,MC);

beta_p_true = beta_p_sol;
beta_x_true = beta_x_sol;
beta_c_true = beta_c_sol;
xi_tilde_true = xi_tilde_sol;

for k=1:2 


if k==1 
beta_p = beta_p_true;
beta_x = beta_x_true*(-4.248/-2.608);
beta_c = beta_c_true*(-1.931/-1.332);
xi_tilde = xi_tilde_sol;
end

if k==2
beta_p = beta_p_true; 
beta_x = (beta_x_true*(-4.248/-2.608))/2;
beta_c = (beta_c_true*(-1.931/-1.332))/2;
xi_tilde = xi_tilde_sol;
end



nExp=X;
nImp=M;
delta=0.975;




xm0=(1:nExp);
xm0=repmat(xm0,N,1);
xm0=xm0(:);
xm0=repmat(xm0,nExp,1);


xm1=(1:nExp);
xm1=repmat(xm1,N*nExp,1);
xm1=xm1(:);

switched=ones(S,1);
for i=1:S
    if xm1(i,1)==xm0(i,1)
        switched(i,1)=0;
    end
end

% Connect on the cities using our city information.
c=zeros(S,1);
cm1=zeros(S,1);
for i=1:S
    for j=1:nExp
        if xm0(i,1)==j
            c(i,1)=cities(j,1);
        end
        if xm1(i,1)==j
            cm1(i,1)=cities(j,1);
        end
    end
end

% City switching indicator
city_switched=zeros(S,1);
for i=1:S
    if cm1(i,1)~=c(i,1)
        city_switched(i,1)=1;
    end
end

%randn('seed',0);
%ep=randn(S,1);
%ep=abs(ep);

%ep=ep.*0.6;




% S is the total number of states
S=N*nExp*nExp;

EV=tom('EV',S,1);

U=beta_p*ep + beta_x*switched + beta_c*city_switched + xi_tilde*lambda;

% EV is a tomSym object, so cannot write:
% for i=1:S CSVF(i,1) = U(i,1) + delta*EV(i,1)

CSVF= U + delta*EV;


index=ones(nExp,1)';
for i=1:nExp
    index(1,i)=N*nExp*(i-1)+1;
end

index=repmat(index,N,1);
index=index(:);
index=repmat(index,nExp,1);

% index is a vector that tells us what the first number of EV calculation
% is for each element of EV.

%TransProbs=rand(N*N*nExp*nExp); Too big!

A=zeros(nExp,N,S);

for i=1:S
    B = (index(i,1):index(i,1)+N*nExp-1)';
    B = reshape(B,N,nExp);
    A(:,:,i) = B';
end
% Each A is a (nExp x N) matrix arranged so as to calculate EV
% For example EV(1) is a double sum: sum of  sum of CSVF(1),(6),(11);
% sum of CSVF(2),(7),(12); etc.

W=tomArray(CSVF,[N*X X]);
W=repmat(W,N,1);
W=W(:);
W=repmat(W,X,1);
W=tomArray(W,[N X N*X*X]);
Y=  log( sum( exp( W),2) );
TransProbs=reshape(TransProbs,N,N*X*X);
Z=sum( Y.*TransProbs,1);
Z=Z(:);

objective = 0;

% Too bad this double-sum code doesn't work, as tomSym only works with 2D
% matrices.
% C = rand(1,N,S);
% E = sum ( log ( sum  ( exp (CSVF( A) ) ) ).*C );
% E = E(:);
% constraints = {EV == E};

constraints = {EV==Z};

%EV(i) == sum ( log ( sum ( exp( CSVF(B) ) )  ).*TransProbs(C)'  ); 


% Here's some practice so you remember what you are doing (use CTRL-T or CTRL-R):
% CSVF2=randn(15,1)
% A=[1 2 3 4 5; 6 7 8 9 10; 11 12 13 14 15]
% CSVF2(A)
% sum ( CSVF2(A))
% log(sum(exp(CSVF2(A))))
% Note this last vector is a row vector, so the TransProbs must also be a
% row vector.

if k==1
guess = struct('EV',EV_sol);
end

if k==2
guess = struct('EV',2*EV_sol);
end

options = struct;
options.name = Name;
options.prilev=3;
options.use_H=false;
options.use_d2c=false;


Prob = sym2prob('minlp',objective,constraints,guess,options);

%Prob.DUNDEE.optPar(20) = 1;
Prob.KNITRO.options.Feastol_Abs=0.0001;
Prob.KNITRO.options.Opttol_Abs=0.0001;
Prob.KNITRO.options.MAXIT = 40000;
Prob.PriLevOpt=3;
Result = tomRun('knitro',Prob,2);
%Result = ezsolve(objective,constraints,guess,options);
toc

% getSolution(Result)

EV=Result.x_k;




%% Monte Carlo Replication:

U=beta_p*ep + beta_x*switched + beta_c*city_switched + xi_tilde*lambda;

% EV is a tomSym object, so cannot write:
% for i=1:S CSVF(i,1) = U(i,1) + delta*EV(i,1)

CSVF= U + delta*EV;


index=ones(nExp,1)';
for i=1:nExp
    index(1,i)=N*nExp*(i-1)+1;
end

index=repmat(index,N,1);
index=index(:);
index=repmat(index,nExp,1);

A=zeros(nExp,N,S);

for i=1:S
    B = (index(i,1):index(i,1)+N*nExp-1)';
    B = reshape(B,N,nExp);
    A(:,:,i) = B';
end

% Each A is a (nExp x N) matrix arranged so as to calculate EV
% For example EV(1) is a double sum: sum of  sum of CSVF(1),(6),(11);
% sum of CSVF(2),(7),(12); etc.

Q2 = zeros(nExp, S);
for j=1:nExp
    Q = A(:,:,N*(j-1)+1);
    Q2 (:, (j-1)*nExp*N +1: j*nExp*N ) = repmat(Q,1,nExp);
end
% Q2 is the matrix for adding up to calculate the denominator of P

P0 = exp(CSVF) ./ sum( exp( CSVF(Q2) ) )';


Q3=zeros(nExp,N*nExp);
 for i=1:nExp
     Q3( :, N*(i-1)+1: N*i) = Q2(:,N*nExp*(i-1)+1:N*nExp*(i-1)+N);
 end
 
cumP0=cumsum(P0(Q3));


 % I don't think I care about time periods here- treat every observation in
 % the industry as a separate decision.
 


MC_switch=zeros(nImp,MC);
MC_cswitch=zeros(nImp,MC);
MC_avswitch=zeros(1,MC);
MC_avcswitch=zeros(1,MC);
MC_xt = zeros(nImp, MC);
MC_pt = zeros(nImp, MC);


for kk = 1:MC
   
    Rx  = unifrnd(0, 1, N*nExp, nImp);
    

    st = state';
    Xt = zeros(N*nExp,nImp);
    xt = zeros(1,nImp);
    xtm1 = zeros(1,nImp);
    ct = zeros(1,nImp);
    ctm1=zeros(1,nImp);

	x_ref=(1:nExp);
	x_ref=repmat(x_ref,N,1);
	x_ref=x_ref(:);
	
  for j=1:nImp
	xtm1(:,j)=x_ref(st(:,j));
  end
  for i=1:nImp
      for j=1:nExp
          if xtm1(:,i)==j
              ctm1(:,i)=cities(j,1);
          end
      end
  end

    
    
% The decision made is the exporter chosen

    for s = 1:N*nExp
        for j=1:nImp
            Xt(s,j) = find(Rx(s,j) < cumP0(:,s), 1);
        end
    end
   % xt is the choice they would make for all possible states they are in.
    
   for j=1:nImp
       xt(:,j)=Xt(st(:,j),j);
   end

for i=1:nImp
   for j=1:nExp
        if xt(:,i)==j
            ct(:,i)=cities(j,1);
        end
   end
end




   for i=1:nImp
      if ct(:,i)~=ctm1(:,i)
         MC_cswitch(i,kk)=1;
      end
      if xt(:,i)~=xtm1(:,i)
         MC_switch(i,kk)=1;
      end
   end
  

    xt=xt';
 
   % Xt is the realized exporter decision: given what state they are in (st)
   % where do they go, based on xt.  Each Xt makes up a column of MC observations.
   
    MC_xt(1:nImp,kk) = xt;
    MC_avswitch(:,kk)=mean(MC_switch(:,kk),1);
    MC_avcswitch(:,kk)=mean(MC_cswitch(:,kk),1);
    MC_pt(1:nImp,kk) = exp(price_maxyear( xt));
end 

EV_counter_final2(1:S,k)=EV;

%% (1) Prices
%% Take the mean and median price for each firm over 1000 runs.
%% Compare to the original price.
%% Weight this relative price by the size of the firm's imports.
%% This will give a price index for each industry.
%% Weight this up by trade shares.

%% Due to weirdness in a few of the prices, I will use the following aggregation strategy:
%% Mean across runs
%% Median across firms (not in this code, so easy to do weighted average in later code as well)
%% Weighted average across industries.

Pprice_mean(1:nImp,k)=mean(MC_pt,2);
Pprice_med(1:nImp,k)=median(MC_pt,2);

%% I can also save the median across all firms 1000 times and make distributions to show for selected industries.

Medprice_firms(1:MC,k)=median(MC_pt,1)';
Aprice_firms(1:MC,k)=mean(MC_pt,1)';
Wprice_firms=zeros(nImp,MC);
for i=1:MC
	Wprice_firms(1:nImp,i)=(MC_pt(1:nImp,i).*firmshare(1:nImp,1));
end
WAprice_firms(1:MC,k)=sum(Wprice_firms,1)';


%% (2) Switching
%% Save the mean and median switching percent over the thousand runs.
%% Compare to true switching percent.
%% Aggregate out to see how much overall is switching.

%Pswitchpct_mean(1,k)=mean(MC_avswitch);
%Pswitchpct_med(1,k)=median(MC_avswitch);
%Pswitchpct_sd(1,k)=sqrt(var(MC_avswitch));


%% I can also save the total distribution to show for selected industries (this will only save the cf, not original).

MC_avswitch_k(1:MC,k)=MC_avswitch';


%% (3) City Switching
%% Save the mean and median city switching percent over the thousand runs.
%% Compare to true city switching percent.
%% Aggregate out to see how much overall is city switching.

%Pcswitchpct_mean(1,k)=mean(MC_avcswitch);
%Pcswitchpct_med(1,k)=median(MC_avcswitch);
%Pcswitchpct_sd(1,k)=sqrt(var(MC_avcswitch));

%% I can also save the total distribution to show for selected industries (this will only save the cf, not original).

MC_avcswitch_k(1:MC,k)=MC_avcswitch';

w
k

end % ends the k loop, over the 5 (actually only 2 here) counterfactuals

% How well do I match the actual price paid?
priceratio1(1:nImp,1)=Pprice_med(1:nImp,1)./ exp(price_maxyear(I1));

% Here I am comparing to the estimated prices (k=1, 1000 MC runs)
priceratio2(1:nImp,1)=Pprice_med(1:nImp,2)./ Pprice_med(1:nImp,1);

% But I also compare to just the actual price they paid
priceratio3(1:nImp,1)=Pprice_med(1:nImp,2)./ exp(price_maxyear(I1));




priceratio=[priceratio1 priceratio2 priceratio3];
save('priceratio_final','priceratio');


save('Medprice_firms','Medprice_firms');
save('Aprice_firms','Aprice_firms');
save('WAprice_firms','WAprice_firms');


MC_switch=[MC_avswitch_k MC_avcswitch_k];
% This dataset is a 1000 x 4 matrix: [switching % in gen. data, switching % in changed data, city, city]
save('MC_switch','MC_switch');


%Pswitchpct=[Pswitchpct_mean Pswitchpct_med Pswitchpct_sd] 
%Pcswitchpct=[Pcswitchpct_mean Pcswitchpct_med Pcswitchpct_sd] 
%All_switching=[Pswitchpct Pcswitchpct];
%save('All_switching','All_switching');


city=zeros(nImp,2);
for i=1:nImp
   for j=1:nExp
        if imp(i,2)==j
            city(i,1)=cities(j,1); % past city
        end
   end
end
for i=1:nImp
   for j=1:nExp
        if imp(i,3)==j
            city(i,2)=cities(j,1); % future city
        end
   end
end

dataswitch=zeros(nImp,1);
datacswitch=zeros(nImp,1);

for i=1:nImp
      if city(i,1)~=city(i,2)
         datacswitch(i,1)=1;
      end
      if imp(i,2)~=imp(i,3)
         dataswitch(i,1)=1;
      end
   end



truth_switch=mean(dataswitch);
truth_cswitch=mean(datacswitch);

truth_switching=[truth_switch truth_cswitch];
save('truth_switching','truth_switching');

%save('EV_counter_final2','EV_counter_final2');

save('CF1_halfcost_biggest_sc','EV_counter_final2','priceratio','Medprice_firms','WAprice_firms','MC_switch','truth_switching');

w

end % ends the w loop over each industry

%% I have saved everything at the industry level now!  So it should be very easy to go back if I want to change.
%% Here is a list of all the things I have saved:
	%% The computed EV for each cf, so if I want to go back, it is easy to re-create.
	%% 3 price ratios (cf2 vs cf1, cf2 vs truth, cf1 vs truth)-> One for each importer in each industry. Name: priceratio
	%% Median price across firms (cf1 vs cf2), 1000 times, for each industry -> Can show illustrative industry. Name: Medprice_firms.
	%% 1000 runs of switching and city switching (cf1 vs cf2) -> Easy to create mean, median, SD, since I am just saving the entire disn for each industry.  .  Name: MC_switch.
	%% True switching and city switching (1 number for each). Name: truth_switching

cd ../china/second_round/estimation/

diary off;





